import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.express as px
import ipywidgets as widgets
from jupyter_dash import JupyterDash
from dash import html, dcc
from dash.dependencies import Input, Output
pd.options.display.max_colwidth = 200
pd.set_option('display.max_columns', None)
df= pd.read_csv("data/compustat_from_1990.csv")
df['sic2']= (df['sich']/100).astype(int)
df=df[['conm', 'gvkey', 'tic', 'fyear', 'at', 'lt', 'lev', 'ni', 'sale', 'mv', 'sic2', 'oancf']]
df['lag_at']= df.groupby('gvkey')['at'].shift(1)
df['ROA']= df['ni'] / df['lag_at']
df['CFO']= df['oancf'] / df['lag_at']
df['accruals']= (df['ni']-df['oancf']) / df['lag_at']
print(df.shape)
df.head(3)
(211989, 16)
| conm | gvkey | tic | fyear | at | lt | lev | ni | sale | mv | sic2 | oancf | lag_at | ROA | CFO | accruals | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AAR CORP | 1004 | AIR | 1990 | 379.958 | 186.180 | 0.490002 | 14.801 | 466.542 | 224.460360 | 50 | 36.891 | NaN | NaN | NaN | NaN |
| 1 | AAR CORP | 1004 | AIR | 1991 | 395.351 | 198.614 | 0.502374 | 10.020 | 422.657 | 204.699595 | 50 | 8.814 | 379.958 | 0.026371 | 0.023197 | 0.003174 |
| 2 | AAR CORP | 1004 | AIR | 1992 | 365.151 | 175.935 | 0.481814 | 0.283 | 382.780 | 214.663477 | 50 | 16.806 | 395.351 | 0.000716 | 0.042509 | -0.041793 |
df.loc[df.sic2 <= 9, 'industry']= 'Agriculture, Forest, Fishing'
df.loc[(df.sic2 >= 10) & (df.sic2 <= 14), 'industry']= 'Mining'
df.loc[(df.sic2 >= 15) & (df.sic2 <= 17), 'industry']= 'Construction'
df.loc[(df.sic2 >= 20) & (df.sic2 <= 39), 'industry']= 'Manufacturing'
df.loc[(df.sic2 >= 40) & (df.sic2 <= 49), 'industry']= 'Trans. & Public Utilities'
df.loc[(df.sic2 >= 50) & (df.sic2 <= 51), 'industry']= 'Wholesale Trade'
df.loc[(df.sic2 >= 52) & (df.sic2 <= 59), 'industry']= 'Retail Trade'
df.loc[(df.sic2 >= 60) & (df.sic2 <= 67), 'industry']= 'Finance, Insurance, Real Estate'
df.loc[(df.sic2 >= 70) & (df.sic2 <= 89), 'industry']= 'Services'
df.loc[(df.sic2 >= 91) & (df.sic2 <= 99), 'industry']= 'Public Administration'
df.dropna(inplace=True)
df= df[df['ROA'].abs()<0.5]
df= df[df['sale']>0]
df= df[df['at']>0]
df= df[df['mv']>0]
df['log_at']=np.log10(df['at'])
df['log_sale']=np.log10(df['sale'])
df['log_mv']=np.log10(df['mv'])
df.shape
(140198, 20)
df=df[df.fyear>=1995]
df_N= df.groupby('gvkey').agg({'gvkey':'count', 'industry':'nunique'}).rename(columns={'gvkey':'N', 'industry':'ind_count'}).reset_index()
df_N.head()
| gvkey | N | ind_count | |
|---|---|---|---|
| 0 | 1004 | 26 | 1 |
| 1 | 1013 | 15 | 1 |
| 2 | 1019 | 7 | 1 |
| 3 | 1021 | 14 | 1 |
| 4 | 1034 | 13 | 1 |
df_to_filter= df_N[(df_N.ind_count==1) & (df_N.N==df_N.N.max())]
balance_gvkey_list= list(df_to_filter.gvkey)
len(balance_gvkey_list)
794
df= df[df.gvkey.isin(balance_gvkey_list)]
df.shape
(20644, 20)
df.fyear.value_counts().sort_index().plot(kind='bar')
print(f"Unique firms: {df.gvkey.nunique()}")
plt.show()
Unique firms: 794
Balanced panel is confirmed.
df[df.fyear==2020].value_counts('industry')
industry Manufacturing 456 Trans. & Public Utilities 117 Services 80 Retail Trade 56 Mining 36 Wholesale Trade 32 Construction 16 Agriculture, Forest, Fishing 1 dtype: int64
df=df[df['industry'].isin(['Services', 'Retail Trade', 'Manufacturing'])]
df_2020= df[df.fyear==2020]
gvkey_100_list= list(df_2020.sort_values('mv', ascending= False).head(100)['gvkey'])
df=df[df.gvkey.isin(gvkey_100_list)]
df.shape
(2600, 20)
df[['at','lt','mv','sale','ni']]= df[['at','lt','mv','sale','ni']]*1000*1000
df[['ROA', 'CFO', 'accruals']].describe()
| ROA | CFO | accruals | |
|---|---|---|---|
| count | 2600.000000 | 2600.000000 | 2600.000000 |
| mean | 0.093176 | 0.143236 | -0.050060 |
| std | 0.075244 | 0.078937 | 0.055847 |
| min | -0.478937 | -0.149743 | -0.565054 |
| 25% | 0.052462 | 0.092544 | -0.072813 |
| 50% | 0.085562 | 0.131595 | -0.046237 |
| 75% | 0.131053 | 0.181850 | -0.024897 |
| max | 0.444756 | 0.553114 | 0.321131 |
xcol='sale'
ycol='ROA'
xcol_label= 'SALES'
ycol_label= 'ROA'
bubble_size_base= 'mv'
df['bubble_size']= df[bubble_size_base].clip(df[bubble_size_base].quantile(0.1),)
bubble_size_label= 'market value'
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
color='industry',
log_x= True,
log_y= False,
hover_name= 'conm',
labels={xcol:xcol_label, ycol:ycol_label, 'industry':''}
)
# global set-up
fig.update_layout(
font_family="Times New Roman",
font_size=15)
# title set-up
fig.update_layout(
title={
'text': f"<b>Dynamic scatter plot: {xcol_label} & {ycol_label} with {bubble_size_label} as a bubble size</b>",
'font_size':25,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
legend=dict(yanchor='top',
y=0.99,
xanchor='left',
x=0.01,
font_size=13,
))
fig.show()
fig.write_html('../_includes/dynamic-scatter-plot.html', auto_play= False)
app = JupyterDash(__name__)
app.layout= html.Div([
html.H1('Dynamic Scatter Plots with market value as a bubble size'),
html.Div([dcc.Dropdown(placeholder="x-variable:", id= "xvar-dropdown", value= 'log_at', options= ['at', 'sale','log_at', 'log_sale']),
dcc.Dropdown(id= "yvar-dropdown", value= 'log_mv', options= ['mv', 'log_mv'])],
style= {'width':'30%'}),
dcc.Graph(id='graph')
])
@app.callback(
Output("graph", "figure"),
[Input("xvar-dropdown", "value"), Input("yvar-dropdown", "value")]
)
def scatter_plot(xvar, yvar):
xcol= xvar
ycol= yvar
# Define size variable to be used as a bubble size
bubble_size_base= 'mv'
fig=px.scatter(df, x=xcol, y=ycol,
animation_frame='fyear',
animation_group='conm',
height= 600, width= 800,
range_x=[min(df[xcol]), max(df[xcol])],
range_y=[min(df[ycol]), max(df[ycol])],
size='bubble_size',
size_max= 50,
color='industry',
hover_name= 'conm',
labels={'industry':''}
)
# global set-up
fig.update_layout(
font_family="Times New Roman",
font_size=15)
# title set-up
fig.update_layout(
title={
'text': f"<b>Dynamic Scatter Plots: {xcol} & {ycol} with {bubble_size_base} as a bubble size</b>",
'font_size':25,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
legend=dict(yanchor='top',
y=0.99,
xanchor='left',
x=0.01,
font_size=13,
))
return fig
app.run_server(mode='inline')
! jupyter nbconvert compustat-bubble-plot-animation.ipynb --to html